{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "## 0. 打印设置\n",
    "pd.set_option('display.max_columns', None)\n",
    "# pd.set_option('display.max_rows', None)  ## 显示全部结果，不带省略点\n",
    "##pd.set_option('display.width', 1000)\n",
    "## pd.set_option('display.float_format', '{:.0f}'.format)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = pd.read_csv(r\"data/train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>policy_id</th>\n",
       "      <th>age</th>\n",
       "      <th>customer_months</th>\n",
       "      <th>policy_bind_date</th>\n",
       "      <th>policy_state</th>\n",
       "      <th>policy_csl</th>\n",
       "      <th>policy_deductable</th>\n",
       "      <th>policy_annual_premium</th>\n",
       "      <th>umbrella_limit</th>\n",
       "      <th>insured_zip</th>\n",
       "      <th>...</th>\n",
       "      <th>witnesses</th>\n",
       "      <th>police_report_available</th>\n",
       "      <th>total_claim_amount</th>\n",
       "      <th>injury_claim</th>\n",
       "      <th>property_claim</th>\n",
       "      <th>vehicle_claim</th>\n",
       "      <th>auto_make</th>\n",
       "      <th>auto_model</th>\n",
       "      <th>auto_year</th>\n",
       "      <th>fraud</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122576</td>\n",
       "      <td>37</td>\n",
       "      <td>189</td>\n",
       "      <td>2013-08-21</td>\n",
       "      <td>C</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1465.71</td>\n",
       "      <td>5000000</td>\n",
       "      <td>455456</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>?</td>\n",
       "      <td>54930</td>\n",
       "      <td>6029</td>\n",
       "      <td>5752</td>\n",
       "      <td>44452</td>\n",
       "      <td>Nissan</td>\n",
       "      <td>Maxima</td>\n",
       "      <td>2000</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>937713</td>\n",
       "      <td>44</td>\n",
       "      <td>234</td>\n",
       "      <td>1998-01-04</td>\n",
       "      <td>B</td>\n",
       "      <td>250/500</td>\n",
       "      <td>500</td>\n",
       "      <td>821.24</td>\n",
       "      <td>0</td>\n",
       "      <td>591805</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>YES</td>\n",
       "      <td>50680</td>\n",
       "      <td>5376</td>\n",
       "      <td>10156</td>\n",
       "      <td>37347</td>\n",
       "      <td>Honda</td>\n",
       "      <td>Civic</td>\n",
       "      <td>1996</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>680237</td>\n",
       "      <td>33</td>\n",
       "      <td>23</td>\n",
       "      <td>1996-02-06</td>\n",
       "      <td>B</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1844.00</td>\n",
       "      <td>0</td>\n",
       "      <td>442490</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>NO</td>\n",
       "      <td>47829</td>\n",
       "      <td>4460</td>\n",
       "      <td>9247</td>\n",
       "      <td>33644</td>\n",
       "      <td>Jeep</td>\n",
       "      <td>Wrangler</td>\n",
       "      <td>2002</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>513080</td>\n",
       "      <td>42</td>\n",
       "      <td>210</td>\n",
       "      <td>2008-11-14</td>\n",
       "      <td>A</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>500</td>\n",
       "      <td>1867.29</td>\n",
       "      <td>0</td>\n",
       "      <td>439408</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>YES</td>\n",
       "      <td>68862</td>\n",
       "      <td>11043</td>\n",
       "      <td>5955</td>\n",
       "      <td>53548</td>\n",
       "      <td>Suburu</td>\n",
       "      <td>Legacy</td>\n",
       "      <td>2003</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>192875</td>\n",
       "      <td>29</td>\n",
       "      <td>81</td>\n",
       "      <td>2002-01-08</td>\n",
       "      <td>A</td>\n",
       "      <td>100/300</td>\n",
       "      <td>1000</td>\n",
       "      <td>816.25</td>\n",
       "      <td>0</td>\n",
       "      <td>640575</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>YES</td>\n",
       "      <td>59726</td>\n",
       "      <td>5617</td>\n",
       "      <td>10301</td>\n",
       "      <td>41550</td>\n",
       "      <td>Ford</td>\n",
       "      <td>F150</td>\n",
       "      <td>2004</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>695</th>\n",
       "      <td>1008425</td>\n",
       "      <td>37</td>\n",
       "      <td>196</td>\n",
       "      <td>1997-06-29</td>\n",
       "      <td>C</td>\n",
       "      <td>250/500</td>\n",
       "      <td>500</td>\n",
       "      <td>1301.20</td>\n",
       "      <td>0</td>\n",
       "      <td>474615</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>NO</td>\n",
       "      <td>61433</td>\n",
       "      <td>10436</td>\n",
       "      <td>11432</td>\n",
       "      <td>39745</td>\n",
       "      <td>Nissan</td>\n",
       "      <td>Pathfinder</td>\n",
       "      <td>2011</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>696</th>\n",
       "      <td>770702</td>\n",
       "      <td>43</td>\n",
       "      <td>229</td>\n",
       "      <td>2001-05-29</td>\n",
       "      <td>A</td>\n",
       "      <td>250/500</td>\n",
       "      <td>500</td>\n",
       "      <td>1434.94</td>\n",
       "      <td>8000000</td>\n",
       "      <td>444476</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>?</td>\n",
       "      <td>68623</td>\n",
       "      <td>6798</td>\n",
       "      <td>14557</td>\n",
       "      <td>50606</td>\n",
       "      <td>Volkswagen</td>\n",
       "      <td>Passat</td>\n",
       "      <td>2013</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>697</th>\n",
       "      <td>755099</td>\n",
       "      <td>35</td>\n",
       "      <td>209</td>\n",
       "      <td>2003-01-11</td>\n",
       "      <td>C</td>\n",
       "      <td>100/300</td>\n",
       "      <td>500</td>\n",
       "      <td>1639.46</td>\n",
       "      <td>0</td>\n",
       "      <td>639608</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>YES</td>\n",
       "      <td>58033</td>\n",
       "      <td>9129</td>\n",
       "      <td>4598</td>\n",
       "      <td>40740</td>\n",
       "      <td>Mercedes</td>\n",
       "      <td>C300</td>\n",
       "      <td>2002</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>698</th>\n",
       "      <td>693804</td>\n",
       "      <td>44</td>\n",
       "      <td>275</td>\n",
       "      <td>2003-07-22</td>\n",
       "      <td>B</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>2000</td>\n",
       "      <td>1042.29</td>\n",
       "      <td>0</td>\n",
       "      <td>432061</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NO</td>\n",
       "      <td>35253</td>\n",
       "      <td>7359</td>\n",
       "      <td>3464</td>\n",
       "      <td>24677</td>\n",
       "      <td>Audi</td>\n",
       "      <td>A3</td>\n",
       "      <td>2007</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>699</th>\n",
       "      <td>598086</td>\n",
       "      <td>47</td>\n",
       "      <td>263</td>\n",
       "      <td>1996-08-15</td>\n",
       "      <td>C</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>500</td>\n",
       "      <td>1282.56</td>\n",
       "      <td>0</td>\n",
       "      <td>433809</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NO</td>\n",
       "      <td>24320</td>\n",
       "      <td>2250</td>\n",
       "      <td>4285</td>\n",
       "      <td>18092</td>\n",
       "      <td>Suburu</td>\n",
       "      <td>Forrestor</td>\n",
       "      <td>2008</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>700 rows × 38 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     policy_id  age  customer_months policy_bind_date policy_state policy_csl  \\\n",
       "0       122576   37              189       2013-08-21            C   500/1000   \n",
       "1       937713   44              234       1998-01-04            B    250/500   \n",
       "2       680237   33               23       1996-02-06            B   500/1000   \n",
       "3       513080   42              210       2008-11-14            A   500/1000   \n",
       "4       192875   29               81       2002-01-08            A    100/300   \n",
       "..         ...  ...              ...              ...          ...        ...   \n",
       "695    1008425   37              196       1997-06-29            C    250/500   \n",
       "696     770702   43              229       2001-05-29            A    250/500   \n",
       "697     755099   35              209       2003-01-11            C    100/300   \n",
       "698     693804   44              275       2003-07-22            B   500/1000   \n",
       "699     598086   47              263       1996-08-15            C   500/1000   \n",
       "\n",
       "     policy_deductable  policy_annual_premium  umbrella_limit  insured_zip  \\\n",
       "0                 1000                1465.71         5000000       455456   \n",
       "1                  500                 821.24               0       591805   \n",
       "2                 1000                1844.00               0       442490   \n",
       "3                  500                1867.29               0       439408   \n",
       "4                 1000                 816.25               0       640575   \n",
       "..                 ...                    ...             ...          ...   \n",
       "695                500                1301.20               0       474615   \n",
       "696                500                1434.94         8000000       444476   \n",
       "697                500                1639.46               0       639608   \n",
       "698               2000                1042.29               0       432061   \n",
       "699                500                1282.56               0       433809   \n",
       "\n",
       "     ... witnesses police_report_available total_claim_amount injury_claim  \\\n",
       "0    ...         3                       ?              54930         6029   \n",
       "1    ...         1                     YES              50680         5376   \n",
       "2    ...         1                      NO              47829         4460   \n",
       "3    ...         2                     YES              68862        11043   \n",
       "4    ...         1                     YES              59726         5617   \n",
       "..   ...       ...                     ...                ...          ...   \n",
       "695  ...         3                      NO              61433        10436   \n",
       "696  ...         1                       ?              68623         6798   \n",
       "697  ...         0                     YES              58033         9129   \n",
       "698  ...         0                      NO              35253         7359   \n",
       "699  ...         0                      NO              24320         2250   \n",
       "\n",
       "    property_claim  vehicle_claim   auto_make  auto_model auto_year fraud  \n",
       "0             5752          44452      Nissan      Maxima      2000     0  \n",
       "1            10156          37347       Honda       Civic      1996     0  \n",
       "2             9247          33644        Jeep    Wrangler      2002     0  \n",
       "3             5955          53548      Suburu      Legacy      2003     1  \n",
       "4            10301          41550        Ford        F150      2004     0  \n",
       "..             ...            ...         ...         ...       ...   ...  \n",
       "695          11432          39745      Nissan  Pathfinder      2011     1  \n",
       "696          14557          50606  Volkswagen      Passat      2013     1  \n",
       "697           4598          40740    Mercedes        C300      2002     0  \n",
       "698           3464          24677        Audi          A3      2007     1  \n",
       "699           4285          18092      Suburu   Forrestor      2008     0  \n",
       "\n",
       "[700 rows x 38 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def overViewAnalysis(list):\n",
    "    for analysis in [list]:\n",
    "        print('----------------行数和列数----------------')\n",
    "        print(analysis.shape)\n",
    "        i = 5\n",
    "        analysis_base = analysis\n",
    "        analysis = analysis.head(i)\n",
    "        analysis.insert(0, 'new_col', '真实数据')  ## 在第一列，加入列（new_col2），并赋值为 “真实数据”\n",
    "        analysis = analysis.append(analysis_base.dtypes, ignore_index=True)\n",
    "        analysis['new_col'].loc[int(i)] = '数据类型'\n",
    "        analysis = analysis.append(analysis_base.isnull().sum(), ignore_index=True)\n",
    "        analysis['new_col'].loc[int(i + 1)] = '空值数量'\n",
    "        analysis = analysis.append(analysis_base.min(), ignore_index=True)\n",
    "        analysis['new_col'].loc[int(i + 2)] = '最小值'\n",
    "        analysis = analysis.append(analysis_base.max(), ignore_index=True)\n",
    "        analysis['new_col'].loc[int(i + 3)] = '最大值'\n",
    "        analysis = analysis.append(analysis_base.describe().loc['mean'], ignore_index=True)\n",
    "        analysis['new_col'].loc[int(i + 4)] = '平均值'\n",
    "        analysis = analysis.append(analysis_base.describe().loc['std'], ignore_index=True)\n",
    "        analysis['new_col'].loc[int(i + 5)] = '方差'\n",
    "\n",
    "        analysis_part02 = analysis.head(i)\n",
    "        # analysis = analysis.drop([0,1,2]) # 删除指定列\n",
    "        analysis.drop(analysis.index[0:i], inplace=True)  # 删除切片行\n",
    "        analysis = analysis.append(analysis_part02, ignore_index=True)\n",
    "\n",
    "        # print(test_identity.describe())\n",
    "        print('----------------整体概况----------------')\n",
    "        print(analysis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------行数和列数----------------\n",
      "(700, 38)\n",
      "----------------整体概况----------------\n",
      "   new_col policy_id    age customer_months policy_bind_date policy_state policy_csl policy_deductable policy_annual_premium umbrella_limit insured_zip insured_sex insured_education_level insured_occupation insured_hobbies insured_relationship capital-gains capital-loss incident_date             incident_type   collision_type incident_severity authorities_contacted incident_state incident_city incident_hour_of_the_day number_of_vehicles_involved property_damage bodily_injuries witnesses police_report_available total_claim_amount injury_claim property_claim vehicle_claim   auto_make auto_model auto_year  fraud\n",
      "0     数据类型     int64  int64           int64           object       object     object             int64               float64          int64       int64      object                  object             object          object               object         int64        int64        object                    object           object            object                object         object        object                    int64                       int64          object           int64     int64                  object              int64        int64          int64         int64      object     object     int64  int64\n",
      "1     空值数量         0      0               0                0            0          0                 0                     0              0           0           0                       0                  0               0                    0             0            0             0                         0                0                 0                     0              0             0                        0                           0               0               0         0                       0                  0            0              0             0           0          0         0      0\n",
      "2      最小值     96771     19               0       1989-12-25            A    100/300               500                   412       -1000000      412997      FEMALE               Associate       adm-clerical    base-jumping              husband             0      -109100    2014-12-07   Multi-vehicle Collision                ?      Major Damage             Ambulance             S1     Arlington                        0                           1               ?               0         0                       ?                100            0              0            70      Accura   3 Series      1995      0\n",
      "3      最大值   1045409     63             498       2015-03-08            C   500/1000              2000                  2005       10000000      649422        MALE                     PhD   transport-moving        yachting                 wife         98289            0    2015-03-29             Vehicle Theft   Side Collision    Trivial Damage                Police             S7   Springfield                       23                           4             YES               2         3                     YES             120666        21652          23812         78446  Volkswagen         X6      2015      1\n",
      "4      平均值    549625     39             205              NaN          NaN        NaN              1148                  1247        1100000      502797         NaN                     NaN                NaN             NaN                  NaN         25842       -26247           NaN                       NaN              NaN               NaN                   NaN            NaN           NaN                       12                           2             NaN               1         1                     NaN              52423         7450           7332         37688         NaN        NaN      2005      0\n",
      "5       方差    259068      9             116              NaN          NaN        NaN               612                   251        2282922       74251         NaN                     NaN                NaN             NaN                  NaN         28108        28465           NaN                       NaN              NaN               NaN                   NaN            NaN           NaN                        7                           1             NaN               1         1                     NaN              26179         4889           4787         18724         NaN        NaN         6      0\n",
      "6     真实数据    122576     37             189       2013-08-21            C   500/1000              1000                  1466        5000000      455456      FEMALE                 Masters    protective-serv         reading        not-in-family         62203            0    2014-12-22  Single Vehicle Collision   Side Collision        Total Loss             Ambulance             S5     Riverwood                       21                           1               ?               0         3                       ?              54930         6029           5752         44452      Nissan     Maxima      2000      0\n",
      "7     真实数据    937713     44             234       1998-01-04            B    250/500               500                   821              0      591805        MALE                      JD       craft-repair            polo       other-relative         31606            0    2015-02-18   Multi-vehicle Collision   Side Collision      Minor Damage                 Other             S5   Springfield                        4                           3               ?               2         1                     YES              50680         5376          10156         37347       Honda      Civic      1996      0\n",
      "8     真实数据    680237     33              23       1996-02-06            B   500/1000              1000                  1844              0      442490      FEMALE             High School  machine-op-inspct       skydiving                 wife             0       -43166    2015-01-18  Single Vehicle Collision   Side Collision        Total Loss                Police             S3     Northbend                        0                           1               ?               2         1                      NO              47829         4460           9247         33644        Jeep   Wrangler      2002      0\n",
      "9     真实数据    513080     42             210       2008-11-14            A   500/1000               500                  1867              0      439408        MALE                      JD   transport-moving     video-games            own-child             0       -49440    2015-02-02   Multi-vehicle Collision  Front Collision      Major Damage                  Fire             S3     Northbend                       20                           3             YES               2         2                     YES              68862        11043           5955         53548      Suburu     Legacy      2003      1\n",
      "10    真实数据    192875     29              81       2002-01-08            A    100/300              1000                   816              0      640575      FEMALE                      MD       craft-repair     video-games            own-child         75296       -73689    2015-02-09   Multi-vehicle Collision   Rear Collision        Total Loss                  Fire             S2     Northbend                        9                           3             YES               2         1                     YES              59726         5617          10301         41550        Ford       F150      2004      0\n"
     ]
    }
   ],
   "source": [
    "overViewAnalysis(X_train)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
